packages = c('rgdal', 'spdep', 'tmap', 'sf', 'ggpubr', 'cluster', 'factoextra', 'NbClust', 'heatmaply', 'corrplot', 'psych', 'tidyverse')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}## Warning: package 'rgdal' was built under R version 4.0.3
## Warning: package 'sp' was built under R version 4.0.3
## Warning: package 'sf' was built under R version 4.0.3
## Warning: package 'factoextra' was built under R version 4.0.3
## Warning: package 'NbClust' was built under R version 4.0.3
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
##
## -- Column specification --------------------------------------------------------
## cols(
## uen = col_character(),
## registration_year = col_double(),
## primary_ssic_code = col_character(),
## postal_code = col_double(),
## category = col_character(),
## primary_ssic_category_description = col_character(),
## X_coord = col_double(),
## Y_coord = col_double()
## )
corp_info_merged <- st_as_sf(corp_info_merged, coords = c('X_coord','Y_coord'), crs = 3414)
ssic2020 <- read_csv("data/aspatial/ssic2020.csv")##
## -- Column specification --------------------------------------------------------
## cols(
## `SSIC 2020` = col_character(),
## category = col_character(),
## primary_ssic_code = col_character()
## )
##
## -- Column specification --------------------------------------------------------
## cols(
## postal_code = col_double(),
## X_coord = col_double(),
## Y_coord = col_double()
## )
mpsz <- st_read(dsn = "data/geospatial", layer="MP14_SUBZONE_WEB_PL") %>%
filter(!(PLN_AREA_N %in% c("NORTH-EASTERN ISLANDS",
"CENTRAL WATER CATCHMENT",
"CHANGI BAY",
"MARINA SOUTH",
"SIMPANG",
"SOUTHERN ISLANDS",
"STRAITS VIEW",
"TENGAH")))## Reading layer `MP14_SUBZONE_WEB_PL' from data source `D:\GitHub\is415-proj\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## projected CRS: SVY21
## Simple feature collection with 311 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 50293.96 ymax: 50256.33
## projected CRS: SVY21
## First 10 features:
## OBJECTID SUBZONE_NO SUBZONE_N SUBZONE_C CA_IND PLN_AREA_N
## 1 2 1 PEARL'S HILL OTSZ01 Y OUTRAM
## 2 3 3 BOAT QUAY SRSZ03 Y SINGAPORE RIVER
## 3 4 8 HENDERSON HILL BMSZ08 N BUKIT MERAH
## 4 5 3 REDHILL BMSZ03 N BUKIT MERAH
## 5 6 7 ALEXANDRA HILL BMSZ07 N BUKIT MERAH
## 6 7 9 BUKIT HO SWEE BMSZ09 N BUKIT MERAH
## 7 8 2 CLARKE QUAY SRSZ02 Y SINGAPORE RIVER
## 8 9 13 PASIR PANJANG 1 QTSZ13 N QUEENSTOWN
## 9 10 7 QUEENSWAY QTSZ07 N QUEENSTOWN
## 10 11 12 KENT RIDGE QTSZ12 N QUEENSTOWN
## PLN_AREA_C REGION_N REGION_C INC_CRC FMEL_UPD_D X_ADDR
## 1 OT CENTRAL REGION CR 8C7149B9EB32EEFC 2014-12-05 28679.06
## 2 SR CENTRAL REGION CR C35FEFF02B13E0E5 2014-12-05 29654.96
## 3 BM CENTRAL REGION CR 3775D82C5DDBEFBD 2014-12-05 26782.83
## 4 BM CENTRAL REGION CR 85D9ABEF0A40678F 2014-12-05 26201.96
## 5 BM CENTRAL REGION CR 9D286521EF5E3B59 2014-12-05 25358.82
## 6 BM CENTRAL REGION CR 7839A8577144EFE2 2014-12-05 27680.06
## 7 SR CENTRAL REGION CR 48661DC0FBA09F7A 2014-12-05 29253.21
## 8 QT CENTRAL REGION CR 1F721290C421BFAB 2014-12-05 22077.34
## 9 QT CENTRAL REGION CR 3580D2AFFBEE914C 2014-12-05 24168.31
## 10 QT CENTRAL REGION CR 601BA309A1AAC731 2014-12-05 23464.84
## Y_ADDR SHAPE_Leng SHAPE_Area geometry
## 1 29782.05 3506.107 559816.2 MULTIPOLYGON (((29092.28 30...
## 2 29974.66 1740.926 160807.5 MULTIPOLYGON (((29932.33 29...
## 3 29933.77 3313.625 595428.9 MULTIPOLYGON (((27131.28 30...
## 4 30005.70 2825.594 387429.4 MULTIPOLYGON (((26451.03 30...
## 5 29991.38 4428.913 1030378.8 MULTIPOLYGON (((25899.7 297...
## 6 30230.86 3275.312 551732.0 MULTIPOLYGON (((27746.95 30...
## 7 30222.86 2208.619 290184.7 MULTIPOLYGON (((29351.26 29...
## 8 29893.78 6571.323 1084792.3 MULTIPOLYGON (((20996.49 30...
## 9 30104.18 3454.239 631644.3 MULTIPOLYGON (((24472.11 29...
## 10 29725.37 7439.548 1826848.6 MULTIPOLYGON (((23332.77 30...
## Coordinate Reference System:
## User input: SVY21
## wkt:
## PROJCRS["SVY21",
## BASEGEOGCRS["SVY21[WGS84]",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ID["EPSG",6326]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["unnamed",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum SVY21 in CRS definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum SVY21 in CRS definition
## integer(0)
## [1] "G" "F" "H" "C" "N" "I" "S" "M" "Q" "L" "J" "R" "P" "E" "K" "D" "A" "O"
for (category_id in unique(corp_info_merged$category)) {
corp_with_category <- corp_info_merged %>%
filter(category == category_id)
mpsz_3414[, paste0("Category ", category_id)]<- lengths(st_intersects(mpsz_3414, corp_with_category))
}## [1] 362.1286
mpsz_3414 <- mpsz_3414 %>%
mutate(`Cat G Prop` = case_when(Total != 0 ~ `Category G`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat F Prop` = case_when(Total != 0 ~ `Category F`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat H Prop` = case_when(Total != 0 ~ `Category H`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat C Prop` = case_when(Total != 0 ~ `Category C`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat N Prop` = case_when(Total != 0 ~ `Category N`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat I Prop` = case_when(Total != 0 ~ `Category I`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat S Prop` = case_when(Total != 0 ~ `Category S`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat M Prop` = case_when(Total != 0 ~ `Category M`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat Q Prop` = case_when(Total != 0 ~ `Category Q`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat L Prop` = case_when(Total != 0 ~ `Category L`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat J Prop` = case_when(Total != 0 ~ `Category J`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat R Prop` = case_when(Total != 0 ~ `Category R`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat P Prop` = case_when(Total != 0 ~ `Category P`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat E Prop` = case_when(Total != 0 ~ `Category E`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat K Prop` = case_when(Total != 0 ~ `Category K`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat D Prop` = case_when(Total != 0 ~ `Category D`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat A Prop` = case_when(Total != 0 ~ `Category A`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat O Prop` = case_when(Total != 0 ~ `Category O`/Total * 1000,
Total == 0 ~ 0))ggplot(data=mpsz_3414, aes(x=`Category M`)) +
geom_histogram(bins=20, color="black", fill="light blue")ggplot(data=mpsz_3414, aes(x=`Cat M Prop`)) +
geom_histogram(bins=20, color="black", fill="light blue")cat_g <- ggplot(data=mpsz_3414,
aes(x= `Cat G Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_f <- ggplot(data=mpsz_3414,
aes(x= `Cat F Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_h <- ggplot(data=mpsz_3414,
aes(x= `Cat H Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_c <- ggplot(data=mpsz_3414,
aes(x= `Cat C Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_n <- ggplot(data=mpsz_3414,
aes(x= `Cat N Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_i <- ggplot(data=mpsz_3414,
aes(x= `Cat I Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")cat_s <- ggplot(data=mpsz_3414,
aes(x= `Cat S Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_m <- ggplot(data=mpsz_3414,
aes(x= `Cat M Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_q <- ggplot(data=mpsz_3414,
aes(x= `Cat Q Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_l <- ggplot(data=mpsz_3414,
aes(x= `Cat L Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_j <- ggplot(data=mpsz_3414,
aes(x= `Cat J Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_r <- ggplot(data=mpsz_3414,
aes(x= `Cat R Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")cat_p <- ggplot(data=mpsz_3414,
aes(x= `Cat P Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_e <- ggplot(data=mpsz_3414,
aes(x= `Cat E Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_k <- ggplot(data=mpsz_3414,
aes(x= `Cat K Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_d <- ggplot(data=mpsz_3414,
aes(x= `Cat D Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_a <- ggplot(data=mpsz_3414,
aes(x= `Cat A Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_o <- ggplot(data=mpsz_3414,
aes(x= `Cat O Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")cluster_vars <- mpsz_3414_derived %>%
select("SUBZONE_N", ends_with("Prop"), -`Cat I Prop`, -`Cat L Prop`, -`Cat H Prop`, -`Cat P Prop`, -`Cat N Prop`, -`Cat J Prop`, -`Cat M Prop`, -`Cat F Prop`)
head(cluster_vars,10)## SUBZONE_N Cat G Prop Cat C Prop Cat S Prop Cat Q Prop Cat R Prop
## 1 PEARL'S HILL 232.77910 24.94062 20.19002 20.190024 9.501188
## 2 BOAT QUAY 0.00000 0.00000 0.00000 0.000000 0.000000
## 3 HENDERSON HILL 316.32653 10.20408 30.61224 40.816327 30.612245
## 4 REDHILL 200.00000 0.00000 75.00000 16.666667 16.666667
## 5 ALEXANDRA HILL 221.12211 36.30363 51.70517 11.001100 6.600660
## 6 BUKIT HO SWEE 230.28391 22.08202 59.93691 34.700315 50.473186
## 7 CLARKE QUAY 0.00000 0.00000 40.00000 0.000000 400.000000
## 8 PASIR PANJANG 1 204.08163 35.71429 51.02041 30.612245 56.122449
## 9 QUEENSWAY 555.55556 15.87302 79.36508 31.746032 47.619048
## 10 KENT RIDGE 58.82353 78.43137 0.00000 9.803922 0.000000
## Cat E Prop Cat K Prop Cat D Prop Cat A Prop Cat O Prop
## 1 0.000000 140.14252 1.187648 2.375297 0.000000
## 2 0.000000 0.00000 0.000000 0.000000 0.000000
## 3 10.204082 30.61224 0.000000 10.204082 0.000000
## 4 0.000000 75.00000 0.000000 0.000000 0.000000
## 5 0.000000 104.51045 2.200220 7.700770 0.000000
## 6 0.000000 66.24606 0.000000 6.309148 0.000000
## 7 0.000000 40.00000 0.000000 0.000000 0.000000
## 8 5.102041 56.12245 0.000000 0.000000 5.102041
## 9 0.000000 15.87302 0.000000 0.000000 0.000000
## 10 0.000000 147.05882 0.000000 19.607843 0.000000
## SUBZONE_N Cat G Prop Cat C Prop Cat S Prop Cat Q Prop
## PEARL'S HILL PEARL'S HILL 232.77910 24.94062 20.19002 20.190024
## BOAT QUAY BOAT QUAY 0.00000 0.00000 0.00000 0.000000
## HENDERSON HILL HENDERSON HILL 316.32653 10.20408 30.61224 40.816327
## REDHILL REDHILL 200.00000 0.00000 75.00000 16.666667
## ALEXANDRA HILL ALEXANDRA HILL 221.12211 36.30363 51.70517 11.001100
## BUKIT HO SWEE BUKIT HO SWEE 230.28391 22.08202 59.93691 34.700315
## CLARKE QUAY CLARKE QUAY 0.00000 0.00000 40.00000 0.000000
## PASIR PANJANG 1 PASIR PANJANG 1 204.08163 35.71429 51.02041 30.612245
## QUEENSWAY QUEENSWAY 555.55556 15.87302 79.36508 31.746032
## KENT RIDGE KENT RIDGE 58.82353 78.43137 0.00000 9.803922
## Cat R Prop Cat E Prop Cat K Prop Cat D Prop Cat A Prop
## PEARL'S HILL 9.501188 0.000000 140.14252 1.187648 2.375297
## BOAT QUAY 0.000000 0.000000 0.00000 0.000000 0.000000
## HENDERSON HILL 30.612245 10.204082 30.61224 0.000000 10.204082
## REDHILL 16.666667 0.000000 75.00000 0.000000 0.000000
## ALEXANDRA HILL 6.600660 0.000000 104.51045 2.200220 7.700770
## BUKIT HO SWEE 50.473186 0.000000 66.24606 0.000000 6.309148
## CLARKE QUAY 400.000000 0.000000 40.00000 0.000000 0.000000
## PASIR PANJANG 1 56.122449 5.102041 56.12245 0.000000 0.000000
## QUEENSWAY 47.619048 0.000000 15.87302 0.000000 0.000000
## KENT RIDGE 0.000000 0.000000 147.05882 0.000000 19.607843
## Cat O Prop
## PEARL'S HILL 0.000000
## BOAT QUAY 0.000000
## HENDERSON HILL 0.000000
## REDHILL 0.000000
## ALEXANDRA HILL 0.000000
## BUKIT HO SWEE 0.000000
## CLARKE QUAY 0.000000
## PASIR PANJANG 1 5.102041
## QUEENSWAY 0.000000
## KENT RIDGE 0.000000
sg_business <- select(cluster_vars, c(2:11))
row.names(sg_business) <- cluster_vars$`SUBZONE_N`
head(sg_business, 10)## Cat G Prop Cat C Prop Cat S Prop Cat Q Prop Cat R Prop
## PEARL'S HILL 232.77910 24.94062 20.19002 20.190024 9.501188
## BOAT QUAY 0.00000 0.00000 0.00000 0.000000 0.000000
## HENDERSON HILL 316.32653 10.20408 30.61224 40.816327 30.612245
## REDHILL 200.00000 0.00000 75.00000 16.666667 16.666667
## ALEXANDRA HILL 221.12211 36.30363 51.70517 11.001100 6.600660
## BUKIT HO SWEE 230.28391 22.08202 59.93691 34.700315 50.473186
## CLARKE QUAY 0.00000 0.00000 40.00000 0.000000 400.000000
## PASIR PANJANG 1 204.08163 35.71429 51.02041 30.612245 56.122449
## QUEENSWAY 555.55556 15.87302 79.36508 31.746032 47.619048
## KENT RIDGE 58.82353 78.43137 0.00000 9.803922 0.000000
## Cat E Prop Cat K Prop Cat D Prop Cat A Prop Cat O Prop
## PEARL'S HILL 0.000000 140.14252 1.187648 2.375297 0.000000
## BOAT QUAY 0.000000 0.00000 0.000000 0.000000 0.000000
## HENDERSON HILL 10.204082 30.61224 0.000000 10.204082 0.000000
## REDHILL 0.000000 75.00000 0.000000 0.000000 0.000000
## ALEXANDRA HILL 0.000000 104.51045 2.200220 7.700770 0.000000
## BUKIT HO SWEE 0.000000 66.24606 0.000000 6.309148 0.000000
## CLARKE QUAY 0.000000 40.00000 0.000000 0.000000 0.000000
## PASIR PANJANG 1 5.102041 56.12245 0.000000 0.000000 5.102041
## QUEENSWAY 0.000000 15.87302 0.000000 0.000000 0.000000
## KENT RIDGE 0.000000 147.05882 0.000000 19.607843 0.000000
## Cat G Prop Cat C Prop Cat S Prop Cat Q Prop
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.3298 1st Qu.:0.01735 1st Qu.:0.01758 1st Qu.:0.00000
## Median :0.4500 Median :0.03810 Median :0.04400 Median :0.01569
## Mean :0.3944 Mean :0.05751 Mean :0.05124 Mean :0.02767
## 3rd Qu.:0.5027 3rd Qu.:0.05700 3rd Qu.:0.06573 3rd Qu.:0.03130
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## Cat R Prop Cat E Prop Cat K Prop Cat D Prop
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.03558 1st Qu.:0.00000
## Median :0.02358 Median :0.00000 Median :0.09278 Median :0.00000
## Mean :0.03392 Mean :0.04249 Mean :0.13440 Mean :0.01169
## 3rd Qu.:0.04049 3rd Qu.:0.03068 3rd Qu.:0.16699 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## Cat A Prop Cat O Prop
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000
## Mean :0.01913 Mean :0.01173
## 3rd Qu.:0.01499 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000
## Cat G Prop Cat C Prop Cat S Prop Cat Q Prop
## Min. :-1.9781 Min. :-0.667310 Min. :-0.6950 Min. :-0.33548
## 1st Qu.:-0.3239 1st Qu.:-0.465962 1st Qu.:-0.4565 1st Qu.:-0.33548
## Median : 0.2788 Median :-0.225306 Median :-0.0982 Median :-0.14531
## Mean : 0.0000 Mean : 0.000000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.5429 3rd Qu.:-0.005913 3rd Qu.: 0.1966 3rd Qu.: 0.04402
## Max. : 3.0373 Max. :10.935294 Max. :12.8682 Max. :11.78784
## Cat R Prop Cat E Prop Cat K Prop Cat D Prop
## Min. :-0.50735 Min. :-0.3607 Min. :-0.8167 Min. :-0.1847
## 1st Qu.:-0.50735 1st Qu.:-0.3607 1st Qu.:-0.6005 1st Qu.:-0.1847
## Median :-0.15457 Median :-0.3607 Median :-0.2529 Median :-0.1847
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.09824 3rd Qu.:-0.1002 3rd Qu.: 0.1981 3rd Qu.:-0.1847
## Max. :14.45027 Max. : 8.1280 Max. : 5.2600 Max. :15.6156
## Cat A Prop Cat O Prop
## Min. :-0.24228 Min. :-0.1296
## 1st Qu.:-0.24228 1st Qu.:-0.1296
## Median :-0.24228 Median :-0.1296
## Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.05243 3rd Qu.:-0.1296
## Max. :12.41937 Max. :10.9190
r <- ggplot(data=mpsz_3414_derived,
aes(x= `Cat G Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
sg_business_minmax_df <- as.data.frame(sg_business.minmax)
s <- ggplot(data=sg_business_minmax_df,
aes(x=`Cat G Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue") +
ggtitle("Min-Max Standardisation")
sg_business_zscore_df <- as.data.frame(sg_business.zscore)
z <- ggplot(data=sg_business_zscore_df,
aes(x=`Cat G Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue") +
ggtitle("Z-score Standardisation")
ggarrange(r, s, z,
ncol = 3,
nrow = 1)m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
ac <- function(x) {
agnes(sg_business, method = x)$ac
}
map_dbl(m, ac)## average single complete ward
## 0.9506336 0.9528419 0.9625796 0.9765662
With reference to the output above, we can see that Ward’s method provides the strongest clustering structure among the four methods assessed. Hence, in the subsequent analysis, only Ward’s method will be used.
set.seed(54321)
gap_stat <- clusGap(sg_business, FUN = hcut, nstart = 25, K.max = 25, B = 50)
# Print the result
print(gap_stat, method = "firstmax")## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = sg_business, FUNcluster = hcut, K.max = 25, B = 50, nstart = 25)
## B=50 simulated reference sets, k = 1..25; spaceH0="scaledPCA"
## --> Number of clusters (method 'firstmax'): 2
## logW E.logW gap SE.sim
## [1,] 9.684116 11.05891 1.374792 0.009404075
## [2,] 9.454766 10.95801 1.503248 0.014234872
## [3,] 9.383786 10.88580 1.502018 0.013384406
## [4,] 9.242075 10.83298 1.590905 0.013008459
## [5,] 9.200741 10.79124 1.590501 0.013121701
## [6,] 9.089472 10.75477 1.665294 0.013452089
## [7,] 9.027474 10.72292 1.695445 0.013054442
## [8,] 8.954053 10.69291 1.738861 0.012935398
## [9,] 8.917038 10.66569 1.748655 0.012722686
## [10,] 8.875763 10.64031 1.764545 0.012340370
## [11,] 8.849896 10.61687 1.766976 0.012043180
## [12,] 8.827149 10.59440 1.767253 0.011937704
## [13,] 8.771298 10.57279 1.801489 0.011870034
## [14,] 8.726574 10.55274 1.826166 0.012276322
## [15,] 8.705511 10.53376 1.828245 0.012444394
## [16,] 8.683364 10.51549 1.832128 0.012348166
## [17,] 8.634498 10.49804 1.863547 0.012293103
## [18,] 8.608160 10.48126 1.873099 0.012298986
## [19,] 8.590848 10.46520 1.874352 0.012186732
## [20,] 8.551617 10.44988 1.898260 0.012106799
## [21,] 8.529918 10.43501 1.905088 0.011865180
## [22,] 8.512432 10.42046 1.908028 0.011846658
## [23,] 8.487536 10.40642 1.918884 0.011718945
## [24,] 8.469926 10.39259 1.922663 0.011585602
## [25,] 8.451780 10.37911 1.927334 0.011552423
heatmaply(normalize(sg_business_mat),
Colv=NA,
dist_method = "euclidean",
hclust_method = "ward.D",
seriate = "OLO",
colors = Blues,
k_row = 10,
margins = c(NA,200,60,NA),
fontsize_row = 4,
fontsize_col = 5,
main="Geographic Segmentation of Singapore by Business Prominence",
xlab = "Business Prominence",
ylab = "Singapore's Planning Areas"
)## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum SVY21 in CRS definition
## Neighbour list object:
## Number of regions: 311
## Number of nonzero links: 1838
## Percentage nonzero weights: 1.900311
## Average number of links: 5.909968
## 2 regions with no links:
## 16 17
## Link number distribution:
##
## 0 1 2 3 4 5 6 7 8 9 10 11 13
## 2 1 4 13 32 78 80 48 32 16 1 3 1
## 1 least connected region:
## 15 with 1 link
## 1 most connected region:
## 279 with 13 links
## [1] "SUDONG"
## [1] "SEMAKAU"
## Neighbour list object:
## Number of regions: 309
## Number of nonzero links: 1838
## Percentage nonzero weights: 1.92499
## Average number of links: 5.94822
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 13
## 1 4 13 32 78 80 48 32 16 1 3 1
## 1 least connected region:
## 15 with 1 link
## 1 most connected region:
## 279 with 13 links
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 309
## Number of nonzero links: 1838
## Percentage nonzero weights: 1.92499
## Average number of links: 5.94822
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 13
## 1 4 13 32 78 80 48 32 16 1 3 1
## 1 least connected region:
## 15 with 1 link
## 1 most connected region:
## 279 with 13 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 309 95481 350807 250427155 2664023836
## [1] "mst" "matrix"
## [1] 308 3
## [,1] [,2] [,3]
## [1,] 8 50 75.92565
## [2,] 50 122 44.42275
## [3,] 122 74 50.92590
## [4,] 50 24 57.06020
## [5,] 8 45 106.64555
## [6,] 45 72 82.89484
plot(mpsz_sp, border=gray(.5))
plot.mst(sg.mst, coordinates(mpsz_sp),
col="blue", cex.lab=0.7, cex.circles=0.005, add=TRUE)## List of 8
## $ groups : num [1:309] 1 2 1 1 1 1 2 1 1 3 ...
## $ edges.groups:List of 10
## ..$ :List of 3
## .. ..$ node: num [1:268] 4 284 116 201 87 157 258 100 160 1 ...
## .. ..$ edge: num [1:267, 1:3] 284 116 201 87 258 157 100 160 1 258 ...
## .. ..$ ssw : num 29136
## ..$ :List of 3
## .. ..$ node: num [1:12] 12 39 37 32 2 21 20 7 56 33 ...
## .. ..$ edge: num [1:11, 1:3] 2 32 20 37 21 39 32 32 32 39 ...
## .. ..$ ssw : num 1443
## ..$ :List of 3
## .. ..$ node: num [1:12] 10 18 23 16 22 19 57 17 27 34 ...
## .. ..$ edge: num [1:11, 1:3] 18 16 10 16 16 16 23 22 23 18 ...
## .. ..$ ssw : num 1050
## ..$ :List of 3
## .. ..$ node: num 250
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num 30
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num 274
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num 175
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num [1:8] 47 54 49 14 41 46 77 13
## .. ..$ edge: num [1:7, 1:3] 54 47 54 47 49 49 49 49 54 46 ...
## .. ..$ ssw : num 1072
## ..$ :List of 3
## .. ..$ node: num [1:3] 53 42 48
## .. ..$ edge: num [1:2, 1:3] 53 53 42 48 71.4 ...
## .. ..$ ssw : num 223
## ..$ :List of 3
## .. ..$ node: num [1:2] 95 58
## .. ..$ edge: num [1, 1:3] 95 58 0
## .. ..$ ssw : num 0
## $ not.prune : NULL
## $ candidates : int [1:10] 1 2 3 4 5 6 7 8 9 10
## $ ssto : num 42810
## $ ssw : num [1:10] 42810 40806 39007 37938 36904 ...
## $ crit : num [1:2] 1 Inf
## $ vec.crit : num [1:309] 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "class")= chr "skater"
## clustergrps
## 1 2 3 4 5 6 7 8 9 10
## 268 12 12 1 1 1 1 8 3 2
plot(mpsz_sp, border=gray(.5))
plot(skaterclust, coordinates(mpsz_3414_sp), cex.lab=.7,
groups.colors=c("red","green","blue", "brown", "pink"), cex.circles=0.005, add=TRUE)## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
groups_mat <- as.matrix(skaterclust$groups)
sg_biz_cluster <- sg_biz_cluster %>%
filter(!SUBZONE_N %in% c("SUDONG", "SEMAKAU"))
sg_biz_spatialcluster <- cbind(sg_biz_cluster, as.factor(groups_mat)) %>%
rename(`SP_CLUSTER`=`as.factor.groups_mat.`)
qtm(sg_biz_spatialcluster, "SP_CLUSTER")